###########################################
## January 2025 Replication Code ##########
## Innovation and Interdependence: ########
## Evidence from Gene-Editing Technology ##
###########################################

rm(list = ls())
dev.off()
library(tidyverse)
setwd("~/Dropbox/Genes/Data/Replication Files Final")

#######################
## Descriptive Plots ##
#######################

## Figure 2: National Regulation of Gene-Editing Technology

library(rnaturalearth)
regs <- read_csv("GeneRegs_Cloropleth.csv")

world <- ne_countries(scale = "medium", returnclass = "sf")
world <- world %>% mutate(country = sovereignt)
world <- world %>%left_join(regs, by  ="country")

ggplot(data = world) + 
  geom_sf(color = "black", 
          aes(fill = gene_reg_score),
          lwd =1/7)+
  coord_sf()+
  theme_bw()+
  labs(fill = "Permissive - Restrictive")+
  scale_fill_distiller(palette = "Reds",
                       na.value = "transparent",
                       direction = 1)+
  theme(legend.position = "bottom",      
        plot.title = element_text(size = 17),
        legend.text = element_text(size =12))


###########################################
## Employment of Gene-Editing Scientists ##
###########################################

## Read in scientific movement data
scientists <- read.csv("GeneDyadYear.csv")

## define function to cluster SE
cl <- function(fm, cluster){ 
  require(sandwich, quietly = TRUE)
  require(lmtest, quietly = TRUE)
  if(length(cluster) != length(fm$fitted.values)){cluster <- cluster[-fm$na.action]}
  M <- length(unique(cluster))
  N <- length(cluster)
  K <- fm$rank
  dfc <- (M/(M-1))*((N-1)/(N-K))
  uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
  vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
  coeftest(fm, vcovCL) }


## Table 1: Employment Relocation of Gene Researchers
## Think we need a couple small adjustments to paper here

FullModel <- lm(move_count ~  RegulatoryDifference*CRISPR + homestay + 
                  GDP_log_origin + GDP_log_destination + 
                  GDPpc_log_origin + GDPpc_log_destination +
                  patents_tot_log_origin + patents_tot_log_destination + 
                  RDexpend_tot_log_origin + RDexpend_tot_log_destination + 
                  n_unviersities_country+n_unviersities_origin_country+
                  mean_rank_country+mean_rank_origin_country,
                data=scientists)
FM.se <- cl(FullModel, scientists$dyadID)

DyadFE <- lm(move_count ~  RegulatoryDifference*CRISPR + homestay + 
                  GDP_log_origin + GDP_log_destination + 
                  GDPpc_log_origin + GDPpc_log_destination +
                  patents_tot_log_origin + patents_tot_log_destination + 
                  RDexpend_tot_log_origin + RDexpend_tot_log_destination + 
                  n_unviersities_country+n_unviersities_origin_country+
                  mean_rank_country+mean_rank_origin_country+
                  dyadID,
                data=scientists)
DF.se <- cl(DyadFE, scientists$dyadID)

dat.FM <- scientists[-FullModel$na.action,]
NoControls <- lm(move_count ~  RegulatoryDifference*CRISPR, data=dat.FM)
NC.se <- cl(NoControls, dat.FM$dyadID)

library(stargazer)
stargazer(NoControls, FullModel, DyadFE, 
          se=list(NC.se[,2], FM.se[,2], DF.se[,2]),
          omit = "dyadID")


#######################################
## Coauthorship, Trials, and Patents ##
#######################################

## Table 2: Effect of National Regulations on Coauthorship, 
# Clinical Trials, Patents.

## Read in country-year level data
load("Coauthorships.Rdata")

## Coauthors 
CA <- lm(percent_articles_intl_100 ~ gene_reg_score*CRISPR + 
           GDP_log + GDPpc_log + patents_tot_ln + RDexpenditure_tot_log + 
           as.factor(country) + n_universities+ mean_rank,
          data=CY.co)
CA.se <- cl(CA, CY.co$country_year) 

## Clinical Trials 
CT <- lm(trials ~ gene_reg_score*CRISPR + 
           GDP_log + GDPpc_log + patents_tot_ln + RDexpenditure_tot_log+ +
           n_universities + mean_rank + as.factor(country), 
          data=CY.co)
CT.se <- cl(CT, CY.co$country_year)

## Patents
PT <- lm(gene_patents ~ gene_reg_score*CRISPR + 
            GDP_log + GDPpc_log + patents_tot_ln + RDexpenditure_tot_log + 
           as.factor(country) + n_universities + mean_rank,
          data=CY.co)
PT.se <- cl(PT, CY.co$country_year)

stargazer(CA, CT, PT, 
          se=list(CA.se[,2], CT.se[,2], PT.se[,2]),
          omit = "as.factor")


####################
## Survey results ##
####################

survey <- read.csv("GeneSurvey.csv")
survey <- survey %>%
  mutate(gene_treat = factor(gene_treat, levels = c("control","china","uk","us")))

## Figure 4: Public Response to Gene-Editing Controversy

rbind(
  broom::tidy(lm(support_reg ~ gene_treat, survey))%>%mutate(model = "Regulation"),
  broom::tidy(lm(fund_gene ~ gene_treat, survey))%>%mutate(model = "Funding"),
  broom::tidy(lm(support_med ~ gene_treat, survey))%>%mutate(model = "Patient Access"),
  broom::tidy(lm(safe ~ gene_treat, survey))%>%mutate(model = "Safety"))%>%
  filter(!term == "(Intercept)")%>%
  mutate(term = case_when(term == "gene_treatuk"~"Foreign Controversy - UK",
                          term == "gene_treatus"~"Domestic Controversy",
                          term == "gene_treatchina"~"Foreign Controversy - China"),
         term = factor(term, levels = c("Foreign Controversy - China","Foreign Controversy - UK","Domestic Controversy")))%>%
  ggplot(aes(x = estimate,y = reorder(term,model), color = term))+
  geom_point(position = position_dodge(0.5))+
  geom_errorbar(aes(xmin = estimate-1.96*std.error,
                    xmax = estimate+1.96*std.error),
                position = position_dodge(0.5),
                width = 0)+
  facet_wrap(~factor(model, levels = c("Regulation","Patient Access", "Funding", "Safety")),nrow = 4,strip.position = "left",)+
  theme_bw()+
  theme(legend.title = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        legend.position = "top",
        legend.background = element_rect(color = "black", linewidth = 0.3),
        legend.direction = "vertical",
        strip.background = element_blank(),
        strip.text = element_blank(),
        strip.text.x = element_blank(),panel.grid = element_blank(),
  )+
  guides(colour = guide_legend(reverse=T))+
  labs(x = "",
       y = "",
       color = "")+
  geom_vline(xintercept = 0, linetype = "dotted", color = "black")+
  scale_color_manual(values = c("darkgreen","blue","red"))+
  geom_text(dat = tibble(model = c("Regulation","Patient Access", "Funding", "Safety"),
                         term = c("Foreign Controvery - China",
                                  "Foreign Controvery - China",
                                  "Foreign Controvery - China",
                                  "Foreign Controvery - China")),
            aes(label = model, x = -1.75, y = term), color = "black",
            size =4)+
  xlim(-2,0.5)

#########################
## Appendix Tables and ##
## Figures ##############
#########################

## Table A1: Placebo Test: Employment Relocation of Mental Health and 
# Eating Disorder Researchers

## We need to adjust paper here

PT_FullModel <- lm(move_count_MH ~  RegulatoryDifference*CRISPR + homestay + 
                  GDP_log_origin + GDP_log_destination + 
                  GDPpc_log_origin + GDPpc_log_destination +
                  patents_tot_log_origin + patents_tot_log_destination + 
                  RDexpend_tot_log_origin + RDexpend_tot_log_destination + 
                  n_unviersities_country+n_unviersities_origin_country+
                  mean_rank_country+mean_rank_origin_country,
                data=scientists)
PT_FM.se <- cl(PT_FullModel, scientists$dyadID)

PT_DyadFE <- lm(move_count_MH ~  RegulatoryDifference*CRISPR + homestay + 
                  GDP_log_origin + GDP_log_destination + 
                  patents_tot_log_origin + patents_tot_log_destination + 
                  RDexpend_tot_log_origin + RDexpend_tot_log_destination + 
                  n_unviersities_country + n_unviersities_origin_country +
                  mean_rank_country+mean_rank_origin_country +
                  dyadID,
             data=scientists)
PT_DF.se <- cl(PT_DyadFE, scientists$dyadID)

dat.FM <- scientists[-PT_FullModel$na.action,]
PT_NoControls <- lm(move_count_MH ~  RegulatoryDifference*CRISPR, data=dat.FM)
PT_NC.se <- cl(PT_NoControls, dat.FM$dyadID)

library(stargazer)
stargazer(PT_NoControls, PT_FullModel, PT_DyadFE, 
          se=list(PT_NC.se[,2], PT_FM.se[,2], PT_DF.se[,2]),
          omit = "dyadID")


## Table A2: Survey sample statistics.

prop.table(table(survey$partyID))
prop.table(table(survey$age_cat))
prop.table(table(survey$edu_cat))
prop.table(table(survey$gender))
prop.table(table(survey$ethnicity_cat))
prop.table(table(survey$hispanic %in% c(2:14, 16)))
prop.table(table(survey$income_cat))
prop.table(table(survey$region_cat))


# Table A3: Survey Results.

Regulation <- estimatr::lm_robust(support_reg ~ gene_treat, survey)
Access <- estimatr::lm_robust(support_med ~ gene_treat, survey)
Funding <- estimatr::lm_robust(fund_gene ~ gene_treat, survey)
Safety <- estimatr::lm_robust(safe ~ gene_treat, survey)

Regulation
Access
Funding
Safety

modelsummary::modelsummary(list(Regulation,Access,Funding,Safety), 
                           output = "latex",gof_map = c('r.squared', "adj.r.squared"), 
                           coef_rename = c("China Controversy", "UK Controversy",
                                           "US Controversy"), 
                           coef_omit = "(Intercept)")
  

# Table A4: Survey Experiment Results on Respondents who pass Manipulation Check.

dat_attention <- survey %>%
  filter(gene_treat == gene_MC1|gene_treat == "control")

Regulation_ATT <- estimatr::lm_robust(support_reg ~ gene_treat, dat_attention)
Access_ATT <- estimatr::lm_robust(support_med ~ gene_treat, dat_attention)
Funding_ATT <- estimatr::lm_robust(fund_gene ~ gene_treat, dat_attention)
Safety_ATT <- estimatr::lm_robust(safe ~ gene_treat, dat_attention)

Regulation_ATT
Access_ATT
Funding_ATT
Safety_ATT

modelsummary::modelsummary(list(Regulation_ATT, Access_ATT, Funding_ATT, Safety_ATT), 
                           output = "latex",gof_map = c('r.squared', "adj.r.squared"), 
                           coef_rename = c("China Controversy", "UK Controversy", "US Controversy"), 
                           coef_omit = "(Intercept)")   

modelsummary::modelsummary(
  list(Regulation_ATT, Access_ATT, Funding_ATT, Safety_ATT), 
  output = "latex",
  gof_map = c("adj.r.squared"), 
  coef_map = c("gene_treatus" = "US Controversy",
               "gene_treatuk" = "UK Controversy", 
               "gene_treatchina" = "China Controversy"), 
  coef_omit = "(Intercept)",
  stars = c('**' = 0.05, '*' = 0.01, '***' = 0.01)
)
                                                                                                                                             

# Figure A1: Gene-editing Patent Applications, 2012-2018:

orbit <- read.csv("orbit_patents.csv")

orbit%>%
  ggplot(aes(x = year, y = n, color = type, group = type))+
  geom_line(stat = "identity")+
  theme_bw()+
  theme(legend.title = element_blank())+
  labs(y = "# patents filed", x = "")

# Figure A2: AddGene Registered Researchers by Country of Origin:

add_gene <- read.csv("AddGene.csv")
add_gene <- add_gene[order(add_gene$total_plasmids),]

add_gene %>%
  filter(type == "request",!country == "")%>%
  ggplot(aes(x = reorder(country, total_plasmids), y = total_plasmids, fill = country))+
  geom_bar(stat = "identity")+
  labs(x = "", y = "Number of AddGene Registrants",
       title = "AddGene CRISPR plasmid registrants by country")+
  guides(fill = "none")+
  theme(axis.text.x = element_text(hjust = 1, angle = 75))



# Figure A3: AddGene depositors by Country of Origin.

add_gene %>%
  filter(type == "deposit", !country == "")%>%
  ggplot(aes(x = reorder(country, total_plasmids), y = total_plasmids, fill = country))+
  geom_bar(stat = "identity")+
  labs(x = "", y = "Number of AddGene Deposits",
       title = "AddGene CRISPR plasmid deposits by country")+
  guides(fill = "none")+
  theme_bw()+
  theme(axis.text.x = element_text(hjust = 1, angle = 90))
ggsave("/Users/coudry/Dropbox/Genes/Paper/addgene_deposits.png",
       last_plot(),dpi = 300)


# Figure A4: Distribution of Responses for Outcome Variables. 

floor_effect <- survey %>%
  mutate(support_reg = as.numeric(support_reg),
         support_med = as.numeric(support_med),
         safe = as.numeric(safe),
         fund_gene = as.numeric(fund_gene))%>%
  select(c(support_reg,
           support_med,
           safe,
           fund_gene,
           gene_treat))%>%
  pivot_longer(cols = c(support_reg,
                        support_med,
                        safe,
                        fund_gene),
               names_to = "outcome",
               values_to = "measure")

floor_effect <- floor_effect%>%
  mutate(outcome = as.factor(outcome))

levels(floor_effect$outcome) <- c("Funding",
                                  "Safety",
                                  "Patient Access",
                                  "Increase Regulation")

ggplot(floor_effect %>%
         mutate(is_control = ifelse(gene_treat == "control",T,F)), 
                aes(x = measure, group = gene_treat)) +
  geom_density(aes(linetype = gene_treat, color = gene_treat), adjust =1.3) +
  scale_color_manual(values =c("grey","black","black","black")) +
  facet_wrap(~outcome)+
  theme_bw()+
  labs(linetype = "Treatment",
       color = "Treatment",
       x = "Scale (1 - Completely Disagree,10 - Completely Agree)",
       y = "Density of responses",
       title = "Distribution of responses for each outcome measure",
       subtitle = "By treatment condition")


# Figure A.4.1: CRISPR tweet sentiment: 

crispr <- read.csv("crispr_tweets.csv")
crispr$time <- crispr$time%>%lubridate::ymd()

# Sentiment Panel
t1 <- crispr%>%
  filter(lang == "en")%>%
  group_by(time)%>%
  mutate(sentiment = mean(sentiment, na.rm=T))%>%
  ggplot(aes(x = time,
             y = sentiment))+
  geom_line(stat = "identity")+
  geom_vline(xintercept = lubridate::ymd("2018-11-26"),
             linetype = "dashed")+
  xlim(lubridate::ymd("2018-09-26"),lubridate::ymd("2019-01-26"))+
  ylim(0,1)+
  theme_bw()+
  labs(x ="",
       y ="Sentiment (average)")

# Moral outrage panel
t2 <- crispr %>%
  filter(lang == "en", 
         time> lubridate::ymd("2018-09-26"), 
         time <lubridate::ymd("2019-01-26"))%>%
  group_by(time)%>%
  summarise(mean = mean(moral_outrage))%>%
  ggplot(aes(x = time, y = mean))+
  geom_line(stat = "identity")+
  geom_vline(xintercept = lubridate::ymd("2018-11-26"),
             linetype = "dashed")+
  xlim(lubridate::ymd("2018-09-26"),lubridate::ymd("2019-01-26"))+
  ylim(0, 0.5)+
  theme_bw()+
  labs(x ="",
       y ="Moral outrage (avg # words/tweet)")

# Tweet histogram Panel
t3 <- crispr%>%
  filter(lang == "en")%>%
  ggplot(aes(x = time))+
  geom_histogram(fill = "white", color = "black",
                 bins = 100)+
  xlim(lubridate::ymd("2018-09-26"),lubridate::ymd("2019-01-26"))+
  theme_bw()+
  labs(x ="",
       y ="# tweets")

library(patchwork)

t1/t2/t3+
  plot_layout(heights = unit(c(2, 4,3), c('cm','cm', 'cm')))

# Table A.4.1: Change in Tweet Sentiment.

crispr$post <- ifelse(crispr$time > lubridate::ymd("2018-11-26"),1,0)
m1 <- estimatr::lm_robust(sentiment~post, crispr%>% filter(time >lubridate::ymd("2018-10-28"),
                                                           time <lubridate::ymd("2018-12-28")))
m2 <- estimatr::lm_robust(moral_outrage~post, crispr%>% filter(time >lubridate::ymd("2018-10-28"),
                                                               time <lubridate::ymd("2018-12-28")))

modelsummary::modelsummary(list(m1,m2), output = "latex")


# Figure A.4.2:CRISPR tweets by location

sentiment <- read.csv("sentiment.csv")
sentiment$time <- sentiment$time%>%lubridate::ymd()
sentiment%>%
  filter(!is.na(location),
         location %in% c("USA","Switzerland","Netherlands","UK",
                         "Japan","Italy","Germany","India",
                         "France","Canada","Australia","España"))%>%
  ggplot(aes(x = time,
             y = sentiment))+
  geom_point(size =0.1)+
  geom_vline(xintercept = lubridate::ymd("2018-10-28"),
             linetype = "dashed")+
  facet_wrap(~location)+
  xlim(lubridate::ymd("2018-09-01"),lubridate::ymd("2019-01-01"))+
  ylim(0,1)+
  geom_smooth(method = "loess")+
  theme_bw()+
  labs(x ="",
       y ="Sentiment (average)")


# Table A.4.2: Tweet Virality.

# Models 1 -4
ro <- estimatr::lm_robust(retweet_count~moral_outrage, crispr)
lo <- estimatr::lm_robust(like_count~moral_outrage, crispr)
rs <- estimatr::lm_robust(retweet_count~sentiment, crispr)
ls <- estimatr::lm_robust(like_count~sentiment, crispr)

modelsummary::modelsummary(list(ro,lo,rs,ls))

# Models 5-8
crispr$time <- crispr$time%>%lubridate::ymd()
crispr <- crispr %>%mutate(post_he = ifelse(time> lubridate::ymd("2018-10-28"),1,0))

ro_temp <- estimatr::lm_robust(retweet_count~moral_outrage*post_he, crispr)
lo_temp <- estimatr::lm_robust(like_count~moral_outrage*post_he, crispr)
rs_temp <- estimatr::lm_robust(retweet_count~sentiment*post_he, crispr)
ls_temp <- estimatr::lm_robust(like_count~sentiment*post_he, crispr)

modelsummary::modelsummary(list(ro_temp,lo_temp,rs_temp,ls_temp), output="latex")


# Figure A.5.1: Regulations and Scientific Retractions.

retractions <- read.csv("retracted.csv")

#number retracted
retractions%>%
  na.omit()%>%
  ggplot(aes(x = year, y = n_retracted,
             color = high_reg))+
  geom_point(position = "jitter",
             size =1)+
  theme_bw()+
  labs(x = "",
       y = "N retracted",
       color = "")+
  theme(axis.text.x = element_text(angle = 75, hjust = 1),
        legend.position = "bottom")


#prop retracted
retractions%>%
  na.omit()%>%
  ggplot(aes(x = year, y = prop_retracted,
             color = high_reg))+
  geom_point(position = "jitter",
             size =1)+
  theme_bw()+
  labs(x = "",
       y = "Proportion retracted",
       color = "")+
  theme(axis.text.x = element_text(angle = 75, hjust = 1),
        legend.position = "bottom")





